getwd()
## [1] "C:/Users/nico/Documents/MATH 4753/Labs/Lab 8"
a = 0
b = 5
runif(10,a,b)
## [1] 2.4939054 1.5850592 2.3393595 2.7596144 3.4628957 3.0234064 2.2083166
## [8] 4.3593284 4.4497258 0.7714912
sample = c(1.3203239, 4.8155007, 0.6193721, 0.5776914, 4.7227894, 4.1488387, 3.9591730,
1.7561037, 3.436954, 3.6542493)
cat("sample:", sample, "\n")
## sample: 1.320324 4.815501 0.6193721 0.5776914 4.722789 4.148839 3.959173 1.756104 3.436954 3.654249
mu = (a+b)/2
sigmasq = ((b-a)^2)/12
sigma = sqrt(sigmasq)
cat("population mean:", mu, "\n")
## population mean: 2.5
cat("population variance:", sigmasq, "\n")
## population variance: 2.083333
xbar = mean(sample)
ssq = var(sample)
cat("sample mean:", xbar, "\n")
## sample mean: 2.9011
cat("sample variance:", ssq, "\n")
## sample variance: 2.769845
With this particular random sample, my sample mean was higher than the population mean by about 0.4 and my sample variance was higher than the population variance by about 0.7
E_T = 10*mu
V_T = 10*sigmasq
E_x = mu
V_x = sigmasq/10
cat("sum mean:", E_T, "\n")
## sum mean: 25
cat("sum variance:", V_T, "\n")
## sum variance: 20.83333
cat("mean mean:", E_x, "\n")
## mean mean: 2.5
cat("mean variance:",V_x, "\n")
## mean variance: 0.2083333
myclt=function(n,iter){
y=runif(n*iter,0,5) # A
data=matrix(y,nr=n,nc=iter,byrow=TRUE) #B
sm=apply(data,2,sum) #C
hist(sm)
sm
}
w=myclt(n=10,iter=10000) #D
mean(w)
## [1] 24.99666
var(w)
## [1] 21.23231
Line A creates y, a random uniform distribution of n*iter variables, with a minimum of 0 and a maximum of 5. Line B creates data, a matrix from the data in y with n rows and iter columns. Line C creates sm, the sum of the numbers in the columns of the data matrix. Line D uses the function myclt with n=10 and iter=10000
myclt=function(n,iter){
y=runif(n*iter,0,5) # A
data=matrix(y,nr=n,nc=iter,byrow=TRUE) #B
sm=apply(data,2,sum) #C
hist(sm/n)
sm/n
}
w=myclt(n=10,iter=10000) #D
mean(w)
## [1] 2.501533
var(w)
## [1] 0.2107113
mycltu=function(n,iter,a=0,b=10){
## r-random sample from the uniform
y=runif(n*iter,a,b)
## Place these numbers into a matrix
## The columns will correspond to the iteration and the rows will equal the sample size n
data=matrix(y,nr=n,nc=iter,byrow=TRUE)
## apply the function mean to the columns (2) of the matrix
## these are placed in a vector w
w=apply(data,2,mean)
## We will make a histogram of the values in w
## How high should we make y axis?
## All the values used to make a histogram are placed in param (nothing is plotted yet)
param=hist(w,plot=FALSE)
## Since the histogram will be a density plot we will find the max density
ymax=max(param$density)
## To be on the safe side we will add 10% more to this
ymax=1.1*ymax
## Now we can make the histogram
hist(w,freq=FALSE, ylim=c(0,ymax), main=paste("Histogram of sample mean",
"\n", "sample size= ",n,sep=""),xlab="Sample mean")
## add a density curve made from the sample distribution
lines(density(w),col="Blue",lwd=3) # add a density plot
## Add a theoretical normal curve
curve(dnorm(x,mean=(a+b)/2,sd=(b-a)/(sqrt(12*n))),add=TRUE,col="Red",lty=2,lwd=3) # add a theoretical curve
## Add the density from which the samples were taken
curve(dunif(x,a,b),add=TRUE,lwd=4)
}
mycltu(n=20,iter=100000)
w=apply(data,2,mean), how does the apply function
use the 2? THe apply function uses the 2 to to find the mean of the
columns (since the order is rows, columns) of the matrix
How many terms are in w, when
mycltu(n=20,iter=100000) is called? w should have 100000
terms since the matrix has 100000 columns
curve(dnorm(x,mean=(a+b)/2 creates a normal curve
for the data
sd=(b-a)/(sqrt(12*n))),add=TRUE,col="Red",lty=2,lwd=3):
Explain why sd takes the formula as shown in the function. Because of
the formula for variance of means for a uniform distribution.
mycltu(n=1, iter=10000, a=0,b=10)
mycltu(n=2, iter=10000, a=0,b=10)
mycltu(n=3,iter=10000,a=0,b=10)
mycltu(n=5,iter=10000,a=0,b=10)
mycltu(n=10, iter=10000,a=0,b=10)
mycltu(n=30,iter=10000,a=0,b=10)
As n increases, the distribution begins to look more like a normal
distribution.
mycltb=function(n,iter,p=0.5,...){
## r-random sample from the Binomial
y=rbinom(n*iter,size=n,prob=p)
## Place these numbers into a matrix
## The columns will correspond to the iteration and the rows will equal the sample size n
data=matrix(y,nr=n,nc=iter,byrow=TRUE)
## apply the function mean to the columns (2) of the matrix
## these are placed in a vector w
w=apply(data,2,mean)
## We will make a histogram of the values in w
## How high should we make y axis?
## All the values used to make a histogram are placed in param (nothing is plotted yet)
param=hist(w,plot=FALSE)
## Since the histogram will be a density plot we will find the max density
ymax=max(param$density)
## To be on the safe side we will add 10% more to this
ymax=1.1*ymax
## Now we can make the histogram
## freq=FALSE means take a density
hist(w,freq=FALSE, ylim=c(0,ymax),
main=paste("Histogram of sample mean","\n", "sample size= ",n,sep=""),
xlab="Sample mean",...)
## add a density curve made from the sample distribution
#lines(density(w),col="Blue",lwd=3) # add a density plot
## Add a theoretical normal curve
curve(dnorm(x,mean=n*p,sd=sqrt(p*(1-p))),add=TRUE,col="Red",lty=2,lwd=3)
}
mycltb(n=4,iter=10000,p=0.3)
mycltb(n=5,iter=10000,p=0.3)
mycltb(n=10,iter=10000,p=0.3)
mycltb(n=20,iter=10000,p=0.3)
mycltb(n=4,iter=10000,p=0.7)
mycltb(n=5,iter=10000,p=0.7)
mycltb(n=10,iter=10000,p=0.7)
mycltb(n=20,iter=10000,p=0.7)
mycltb(n=4,iter=10000,p=0.5)
mycltb(n=5,iter=10000,p=0.5)
mycltb(n=10,iter=10000,p=0.5)
mycltb(n=20,iter=10000,p=0.5)
I am not sure what to conclude, other than than the distributions seem
consistent even if p changes.
mycltp=function(n,iter,lambda=10,...){
## r-random sample from the Poisson
y=rpois(n*iter,lambda=lambda)
## Place these numbers into a matrix
## The columns will correspond to the iteration and the rows will equal the sample size n
data=matrix(y,nr=n,nc=iter,byrow=TRUE)
## apply the function mean to the columns (2) of the matrix
## these are placed in a vector w
w=apply(data,2,mean)
## We will make a histogram of the values in w
## How high should we make y axis?
## All the values used to make a histogram are placed in param (nothing is plotted yet)
param=hist(w,plot=FALSE)
## Since the histogram will be a density plot we will find the max density
ymax=max(param$density)
## To be on the safe side we will add 10% more to this
ymax=1.1*ymax
## Make a suitable layout for graphing
layout(matrix(c(1,1,2,3),nr=2,nc=2, byrow=TRUE))
## Now we can make the histogram
hist(w,freq=FALSE, ylim=c(0,ymax), col=rainbow(max(w)),
main=paste("Histogram of sample mean","\n", "sample size= ",n," iter=",iter," lambda=",lambda,sep=""),
xlab="Sample mean",...)
## add a density curve made from the sample distribution
#lines(density(w),col="Blue",lwd=3) # add a density plot
## Add a theoretical normal curve
curve(dnorm(x,mean=lambda,sd=sqrt(lambda/n)),add=TRUE,col="Red",lty=2,lwd=3) # add a theoretical curve
# Now make a new plot
# Since y is discrete we should use a barplot
barplot(table(y)/(n*iter),col=rainbow(max(y)), main="Barplot of sampled y", ylab ="Rel. Freq",xlab="y" )
x=0:max(y)
plot(x,dpois(x,lambda=lambda),type="h",lwd=5,col=rainbow(max(y)),
main="Probability function for Poisson", ylab="Probability",xlab="y")
}
mycltp(n=2, iter=10000,lambda=4)
mycltp(n=3,iter=10000,lambda=4)
mycltp(n=5,iter=10000,lambda=4)
mycltp(n=10,iter=10000,lambda=4)
mycltp(n=20,iter=10000,lambda=4)
mycltp(n=2, iter=10000,lambda=10)
mycltp(n=3,iter=10000,lambda=10)
mycltp(n=5,iter=10000,lambda=10)
mycltp(n=10,iter=10000,lambda=10)
mycltp(n=20,iter=10000,lambda=10)
Task video
SPRING244753wise0073::myclt(1,2)
## [1] 4.204648 4.381574